# coding:utf-8


from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


ROOTURL = "https://b2b.10086.cn"
KEYS = ['河南','在线','移动','中移']

def getLinks(url):
    try:
        driver = webdriver.PhantomJS()
        driver.get(url)
        pageSource = driver.page_source
        soup = BeautifulSoup(pageSource, 'lxml')
        #tables = soup.findAll("div",{"class":"container_div2_div_ul2 clearL"})
        tables = soup.findAll("table",{"class":"jtgs_table"})
    except:
        print('没网了')
    finally:
        driver.close()
        driver.quit()
        return tables


def getNews(tables):
    result = []
    for table in tables:
        for tr in table.find_all('tr'):
            for td in tr.find_all('td'):
                for a in td.find_all('a'):
                    tmp = []
                    if checkKeys(a.get_text()):
                        tmp.append(ROOTURL + a.attrs['href'])
                        tmp.append(a.get_text())
                        result.append(tmp)
    return result


def checkKeys(text):
    for i in KEYS:
        if i in text:
            return True
    return False
    

url = ROOTURL + '/b2b/main/preIndex.html'
tables = getLinks(url)
result = getNews(tables)




